# Dickstein, Ho, and Mark (2023)
library(tidyr)
library(readr)
library(dplyr)
library(data.table)
library(knitr)
library(ggplot2)
library(readxl)
library(numDeriv)
library(stringr)
library(yaml)
library(gtools)
library(haven)
try(library(Hmisc))
library(foreign)
try(library(EnvStats))

get_diags <- function(data, output_path) {

  solution_csv <- read_csv(paste0(output_path,  "/solution.csv"))
  covar_names_csv <- read_csv(paste0(output_path, "/covar_names.csv"))
  covar_lens_csv <- read_csv(paste0(output_path,  "/covar_lens.csv"))

  solution <- as.numeric(solution_csv[,3:ncol(solution_csv)][1,])
  covar_names <- covar_names_csv[['x']]
  covar_lens <- covar_lens_csv[['x']]

  param_data <- parametrize_choice_data(
    solution,
    data,
    covar_names,
    covar_lens,
    score=TRUE)
  set.seed(4197169)
  param_data <- param_data %>%
    mutate(
      lambda = rexp(n(), rate=alpha_i),
      simcost=(x_av+omega_i*x_av^2)*lambda,
      wsimcost=simcost*(simcost < 89.37073) + 89.37073*(simcost > 89.37073),
      wlambda=lambda*(lambda < 89.37073) + 89.37073*(lambda > 89.37073))
  param_data['sim_eps'] <- revd(nrow(param_data))
  param_data['sim_util'] <- param_data['delta_i'] + param_data['sim_eps']
  param_data <- param_data %>%
    group_by(subscriberid_year) %>%
    mutate(sim_choice = as.numeric(sim_util == max(sim_util))) %>%
    ungroup()
  obs_data <- param_data %>% filter(choice == 1 | sim_choice == 1)

  save(obs_data, file=paste0(output_path, "/diag.rda"))
  write.dta(obs_data, file=paste0(output_path, "/diag.dta"))
  save(param_data, file=paste0(output_path, "/diag_exploded.rda"))
  write.csv(param_data, file=paste0(output_path, "/diag_exploded.csv"))
}

get_se <- function(data, output_path, cost_censor) {

  solution_csv <- read_csv(paste0(output_path, "/solution.csv"))
  covar_names_csv <- read_csv(paste0(output_path, "/covar_names.csv"))
  covar_lens_csv <- read_csv(paste0(output_path, "/covar_lens.csv"))


  solution <- as.numeric(solution_csv[,3:ncol(solution_csv)][1,])
  covar_names <- covar_names_csv[['x']]
  covar_lens <- covar_lens_csv[['x']]
  cost_covar_lens <- covar_lens[1:2]
  cost_covar_names <- covar_names[1:sum(cost_covar_lens)]

  combined_llh <- get_combined_llh(
    theta=solution,
    data=data,
    covar_names=covar_names,
    covar_lens=covar_lens,
    cost_covar_names=cost_covar_names,
    cost_covar_lens=cost_covar_lens,
    gradient=TRUE,
    scores=TRUE,
    fixed_cost_censor=cost_censor)

  choice_score <- combined_llh$choice_score
  cost_score <- combined_llh$cost_score
  names(cost_score) <- sub("cost_", "", names(cost_score))
  
  # Generate 0 columns over at cost_score
  missing_scores <- names(choice_score)[!(names(choice_score) %in% names(cost_score))]
  for (x in missing_scores) {
    cost_score[x] <- 0
  }
  cost_score[is.na(cost_score)] <- 0
  combined_score <- as.matrix(choice_score + cost_score)

  # Remove the column that is just zero as it's not being estimated
  if (covar_names[length(covar_names)] == "zero") {
    combined_score = combined_score[ , !colnames(combined_score) %in% c("score_Z_1")]

  }

  num_obs <- nrow(data %>% filter(choice == 1))
  info_matrix <- (t(combined_score) %*% combined_score) / num_obs
  inverse_info_matrix <- solve(info_matrix, tol = 1e-30)
  varcov <- inverse_info_matrix / num_obs
  standard_errors <- sqrt(diag(varcov))

  if (covar_names[length(covar_names)] == "zero") {
    standard_errors['zero'] <- 0
  }

  total_n = nrow(data %>% filter(choice == 1))
  insured_n = nrow(data %>% filter(choice == 1 & insurance == 1))
  setNames(standard_errors, covar_names)
  output <- as.data.frame(
    rbind(
      cbind(
        covar_names,
        t(rbind(solution, standard_errors))),
      t(c("N", total_n, insured_n)))
  )

  # Prepare file path
  write_csv(output, paste0(output_path, "/se.csv"))
  write_csv(as.data.frame(varcov), paste0(output_path, "/varcov.csv"))
}

